#################################
# Figure 1
#################################

rm(list=ls())

library(copula)
library(ggplot2)
library(ggExtra)

set.seed(123456)

N <- 3000
norm.cop <- normalCopula(0)
draws1 <- data.frame( 100*rCopula(N, norm.cop) )
names(draws1) <- c("SET Score Percentile", "Instructor Quality Percentile")

p <- ggplot(draws1, aes(`SET Score Percentile`, `Instructor Quality Percentile`)) + geom_point(color="darkgray", alpha=0.25) + theme_classic() + theme(panel.border = element_rect(fill=NA))
q <- ggMarginal(p, type = "histogram", fill="gray", xparams = list(size=0.2), yparams = list(size=0.2))
ggsave(filename="correlation-zero.pdf", plot=q, width=3, height=(2.8 / 5.78) * 6.04, units="in")




rm(list=ls())
N <- 3000
norm.cop <- normalCopula(0.4)
draws1 <- data.frame( 100*rCopula(N, norm.cop) )
names(draws1) <- c("SET Score Percentile", "Instructor Quality Percentile")

p <- ggplot(draws1, aes(`SET Score Percentile`, `Instructor Quality Percentile`)) + geom_point(color="darkgray", alpha=0.25) + theme_classic() + theme(panel.border = element_rect(fill=NA))
q <- ggMarginal(p, type = "histogram", fill="gray", xparams = list(size=0.2), yparams = list(size=0.2))
ggsave(filename="correlation-mod.pdf", plot=q, width=3, height=(2.8 / 5.78) * 6.04, units="in")




rm(list=ls())
N <- 3000
norm.cop <- normalCopula(0.9)
draws1 <- data.frame( 100*rCopula(N, norm.cop) )
names(draws1) <- c("SET Score Percentile", "Instructor Quality Percentile")

p <- ggplot(draws1, aes(`SET Score Percentile`, `Instructor Quality Percentile`)) + geom_point(color="darkgray", alpha=0.25) + theme_classic() + theme(panel.border = element_rect(fill=NA))
q <- ggMarginal(p, type = "histogram", fill="gray", xparams = list(size=0.2), yparams = list(size=0.2))
ggsave(filename="correlation-high.pdf", plot=q, width=3, height=(2.8 / 5.78) * 6.04, units="in")


#################################
# Appendix Figure 8
#################################

rm(list=ls())

library(mvtnorm)
library(ggplot2)
library(ggExtra)

set.seed(123456)

N <- 3000
Sigma = matrix(data=c(1, 0, 0, 1), nrow=2, ncol=2)
draws1 <- data.frame( rmvnorm(N, mean=c(0,0), sigma=Sigma) )
names(draws1) <- c("SET Score", "Instructor Quality Metric")

p <- ggplot(draws1, aes(`SET Score`, `Instructor Quality Metric`)) + geom_point(color="darkgray", alpha=0.25) + theme_classic() + theme(panel.border = element_rect(fill=NA))
q <- ggMarginal(p, type = "histogram", fill="gray", xparams = list(size=0.2), yparams = list(size=0.2))
ggsave(filename="correlation-normal-zero.pdf", plot=q, width=3, height=(2.8 / 5.78) * 6.04, units="in")




rm(list=ls())
N <- 3000
Sigma = matrix(data=c(1, 0.4, 0.4, 1), nrow=2, ncol=2)
draws1 <- data.frame( rmvnorm(N, mean=c(0,0), sigma=Sigma) )
names(draws1) <- c("SET Score", "Instructor Quality Metric")

p <- ggplot(draws1, aes(`SET Score`, `Instructor Quality Metric`)) + geom_point(color="darkgray", alpha=0.25) + theme_classic() + theme(panel.border = element_rect(fill=NA))
q <- ggMarginal(p, type = "histogram", fill="gray", xparams = list(size=0.2), yparams = list(size=0.2))
ggsave(filename="correlation-normal-mod.pdf", plot=q, width=3, height=(2.8 / 5.78) * 6.04, units="in")




rm(list=ls())
N <- 3000
Sigma = matrix(data=c(1, 0.9, 0.9, 1), nrow=2, ncol=2)
draws1 <- data.frame( rmvnorm(N, mean=c(0,0), sigma=Sigma) )
names(draws1) <- c("SET Score", "Instructor Quality Metric")


p <- ggplot(draws1, aes(`SET Score`, `Instructor Quality Metric`)) + geom_point(color="darkgray", alpha=0.25) + theme_classic() + theme(panel.border = element_rect(fill=NA))
q <- ggMarginal(p, type = "histogram", fill="gray", xparams = list(size=0.2), yparams = list(size=0.2))
ggsave(filename="correlation-normal-high.pdf", plot=q, width=3, height=(2.8 / 5.78) * 6.04, units="in")




#################################
# Appendix Figure 9
#################################

rm(list=ls())

library(mvtnorm)
set.seed(123456)

N <- 500000
rho <- seq(0.01, 0.99, 0.01)
error.rate <- c()
improvement <- c()

pb <- txtProgressBar(0, length(rho), style=3)
for(i in seq_along(rho)){
  
  setTxtProgressBar(pb, i
                    )
  sigma.mat <- matrix(data=c(1, rho[i], rho[i], 1), nrow=2, ncol=2, byrow=T)
  draws1 <- rmvnorm(N, mean=c(0,0), sigma=sigma.mat)
  draws2 <- rmvnorm(N, mean=c(0,0), sigma=sigma.mat)
  
  #eval.m <- draws1[,1] > draws2[,1]
  #effect.m <- draws1[,2] > draws2[,2]
  correct1 <- (draws1[,1] > draws2[,1]) & (draws1[,2] > draws2[,2])
  correct2 <- (draws1[,1] < draws2[,1]) & (draws1[,2] < draws2[,2])
  correct3 <- (draws1[,1] == draws2[,1]) & (draws1[,2] == draws2[,2])
  
  correct <- correct1 + correct2 + correct3
  error.rate[i] <- 100*(1 - sum(correct)/N)
  improvement[i] <- (sum(correct)/N - 0.5)/0.5
  
}
close(pb)

pdf(file="error-rate.pdf", width=5, height=5.5)

par(mar=c(5,5,4,2))

scatter.smooth(error.rate ~ rho,
               xlab = c("correlation between true instructor quality", "and average student evaluation"),
               ylab = "", #"error rate (% of time higher quality teacher has lower average evaluation)",
               span = 0.2, degree=2, col="darkgray"
)

mtext(text = "error rate (% of time higher quality instructor", side=2, line=4)
mtext(text = "has lower average evaluation)", side=2, line=3)

clip(-1,.4,-1,error.rate[rho==0.4])
abline(v=0.4, col="darkgray", lty=2)
abline(h=error.rate[rho==0.4], col="darkgray", lty=2)
clip(-1,0.9,-1,error.rate[rho==0.9])
abline(v=0.9, col="darkgray", lty=2)
abline(h=error.rate[rho==0.9], col="darkgray", lty=2)
box()


dev.off()


#################################
# Appendix Figure 10
#################################


rm(list=ls())

library(mvtnorm)
set.seed(123456)

N <- 500000
rho <- 0.4
gap <- seq(from=0, to=2, by=0.01)
error.rate <- c()
decision.rate <- c()

pb <- txtProgressBar(0, length(gap), style=3)
for(i in seq_along(gap)){
  
  setTxtProgressBar(pb, i)
  sigma.mat <- matrix(data=c(1, rho, rho, 1), nrow=2, ncol=2, byrow=T)
  draws1 <- rmvnorm(N, mean=c(0,0), sigma=sigma.mat)
  draws2 <- rmvnorm(N, mean=c(0,0), sigma=sigma.mat)
  
  eval.m <- ifelse(draws1[,1] - draws2[,1] > gap[i], 1,
                   ifelse( draws2[,1] - draws1[,1] > gap[i], -1, 0))
  #effect.m <- draws1[,2] > draws2[,2]
  
  decision <- eval.m != 0
  incorrect <- (eval.m == 1 & (draws1[,2] < draws2[,2])) | (eval.m == -1 & (draws1[,2] > draws2[,2]))
  error.rate[i] <- 100*(sum(incorrect)/N)
  decision.rate[i] <- 100*(sum(decision)/N)

}

close(pb)

pdf(file="gap-error.pdf", width=5.5, height=5.85)

par(mar=c(5,5,4,2))
scatter.smooth(error.rate ~ gap,
               xlab = c("minimum diff. in average evaluation scores","(in percentiles) required for decision"),
               ylab = "",
               span = 0.2, degree=2, col="darkgray", ylim=c(0,40)
)
text(1.15, 40, labels = paste("correlation between avg. evaluation and quality = ", rho, sep=""),
     cex=0.8)
mtext(text = "error rate (% of time better teacher has average evaluation",
      side=2, line=4)
mtext(text = "lower by more than the percentile gap on the x-axis)",
      side=2, line=3)

x1 <- gap[which(abs(10-error.rate) == min(abs(10-error.rate)))]
x2 <- gap[which(abs(5-error.rate) == min(abs(5-error.rate)))]
clip(-1,x1,-10,10)
abline(v=x1, col="darkgray", lty=2)
abline(h=10, col="darkgray", lty=2)
clip(-1,x2,-10,5)
abline(v=x2, col="darkgray", lty=2)
abline(h=5, col="darkgray", lty=2)
box()

dev.off()

gap[which(abs(10-error.rate) == min(abs(10-error.rate)))]
error.rate[which(abs(10-error.rate) == min(abs(10-error.rate)))]

gap[which(abs(5-error.rate) == min(abs(5-error.rate)))]
error.rate[which(abs(5-error.rate) == min(abs(5-error.rate)))]

pdf(file="gap-decision.pdf", width=5, height=5.5)

par(mar=c(5,5,4,2))
scatter.smooth(decision.rate ~ gap,
               xlab = "diff. in average evaluation scores (in standard deviations)",
               ylab = "",
               span = 0.2, degree=2, col="darkgray"
)
text(1.4, 100, labels = paste("correlation between avg. evaluation and quality = ", rho, sep=""),
     cex=0.8)
mtext(text = "decision rate (% of time teacher has average evaluation",
      side=2, line=4)
mtext(text = "that is lower by number of points indicated on x-axis)",
      side=2, line=3)

dev.off()




#################################
# Figure 2
#################################


rm(list=ls())

library(copula)
set.seed(123456)

N <- 500000
rho <- seq(0.01, 0.99, 0.01)
error.rate <- c()
improvement <- c()

pb <- txtProgressBar(0, length(rho), style=3)
for(i in seq_along(rho)){
  
  setTxtProgressBar(pb, i)
  norm.cop <- normalCopula(rho[i])
  draws1 <- rCopula(N, norm.cop)
  draws2 <- rCopula(N, norm.cop)
  
  #eval.m <- draws1[,1] > draws2[,1]
  #effect.m <- draws1[,2] > draws2[,2]
  correct1 <- (draws1[,1] > draws2[,1]) & (draws1[,2] > draws2[,2])
  correct2 <- (draws1[,1] < draws2[,1]) & (draws1[,2] < draws2[,2])
  correct3 <- (draws1[,1] == draws2[,1]) & (draws1[,2] == draws2[,2])
  
  correct <- correct1 + correct2 + correct3
  error.rate[i] <- 100*(1 - sum(correct)/N)
  improvement[i] <- (sum(correct)/N - 0.5)/0.5
  
}
close(pb)

pdf(file="copula-error-rate.pdf", width=5, height=5.5)

par(mar=c(5,5,4,2))

scatter.smooth(error.rate ~ rho,
               xlab = c("correlation between true instructor quality", "and average student evaluation"),
               #ylab = c("error rate (% of time higher quality instructor", "has lower average evaluation"),
               ylab= "",
               span = 0.2, degree=2, col="darkgray"
)
mtext(text = "error rate (% of time higher quality instructor", side=2, line=4)
mtext(text = "has lower average evaluation)", side=2, line=3)

clip(-1,.4,-1,error.rate[rho==0.4])
abline(v=0.4, col="darkgray", lty=2)
abline(h=error.rate[rho==0.4], col="darkgray", lty=2)
clip(-1,0.9,-1,error.rate[rho==0.9])
abline(v=0.9, col="darkgray", lty=2)
abline(h=error.rate[rho==0.9], col="darkgray", lty=2)
box()

dev.off()

error.rate[rho==0.4]
error.rate[rho==0.9]


#################################
# Figure 3
#################################


rm(list=ls())

library(copula)
set.seed(123456)

N <- 500000
rho <- 0.4
gap <- seq(from=0, to=1, by=0.005)
error.rate <- c()
decision.rate <- c()

pb <- txtProgressBar(0, length(gap), style=3)
for(i in seq_along(gap)){
  
  setTxtProgressBar(pb, i)
  norm.cop <- normalCopula(rho)
  draws1 <- rCopula(N, norm.cop)
  draws2 <- rCopula(N, norm.cop)
  
  eval.m <- ifelse(draws1[,1] - draws2[,1] > gap[i], 1,
                   ifelse( draws2[,1] - draws1[,1] > gap[i], -1, 0))
  #effect.m <- draws1[,2] > draws2[,2]
  
  decision <- eval.m != 0
  incorrect <- (eval.m == 1 & (draws1[,2] < draws2[,2])) | (eval.m == -1 & (draws1[,2] > draws2[,2]))
  error.rate[i] <- 100*(sum(incorrect)/N)
  decision.rate[i] <- 100*(sum(decision)/N)
  
}

close(pb)

gap <- 100*gap

pdf(file="copula-gap-error.pdf", width=5.5, height=5.85)

par(mar=c(5,5,4,2))
scatter.smooth(error.rate ~ gap,
               xlab = c("minimum diff. in average evaluation scores","(in percentiles) required for decision"),
               ylab = "",
               span = 0.2, degree=2, col="darkgray"
)
text(57.5, 37, labels = paste("correlation between avg. evaluation and quality = ", rho, sep=""),
     cex=0.8)
mtext(text = "error rate (% of time better teacher has average evaluation",
      side=2, line=4)
mtext(text = "lower by more than the percentile gap on the x-axis)",
      side=2, line=3)

x1 <- gap[which(abs(10-error.rate) == min(abs(10-error.rate)))]
x2 <- gap[which(abs(5-error.rate) == min(abs(5-error.rate)))]
clip(-10,x1,-10,10)
abline(v=x1, col="darkgray", lty=2)
abline(h=10, col="darkgray", lty=2)
clip(-10,x2,-10,5)
abline(v=x2, col="darkgray", lty=2)
abline(h=5, col="darkgray", lty=2)
box()


dev.off()

gap[which(abs(10-error.rate) == min(abs(10-error.rate)))]
error.rate[which(abs(10-error.rate) == min(abs(10-error.rate)))]

gap[which(abs(5-error.rate) == min(abs(5-error.rate)))]
error.rate[which(abs(5-error.rate) == min(abs(5-error.rate)))]

decision.rate[which(abs(5-error.rate) == min(abs(5-error.rate)))]




#################################
# Figure 4
#################################

rm(list=ls())

library(copula)
set.seed(123456)

N <- 1000000
rho <- 0.4
gap <- 0
error.rate <- c()
decision.rate <- c()

norm.cop <- normalCopula(rho)
draws1 <- rCopula(N, norm.cop)

scores <- apply(X=draws1, FUN=quantile, MARGIN=2, probs=seq(0.05, 0.95, 0.05))

# proportion of faculty with evaluations below 20th percentile
# who are above-average quality teachers
1-ecdf(draws1[draws1[,1]<=scores["20%",1],2])(0.5)

pdf(file="copula-probability-density.pdf", width=5.5, height=5.85)

par(mar=c(5,5,4,2))

y.r <- (draws1[draws1[,1]<=scores["20%",1],2])*100
hist(y.r, freq=F, breaks=10, ylim=c(0, 0.024), yaxt='n',
     #main = c("distribution of teacher quality", "student evaluations at or below 20th percentile"),
     main="", col= c(rep("white", 5), rep("gray", 5)),
     xlab = "percentile of teacher quality", ylab="")
axis(2, at=seq(0, 0.024, 0.004), las=1, labels=seq(0, 0.24, 0.04))
mtext(text="density (proportion of faculty in bin)", side=2, line=4)
legend("topright", fill=c("white", "gray"), 
       legend=c("at or below median quality", "above median quality"))

dev.off()


# proportion of faculty with evaluations above 80th percentile
# who are at or below the median in quality
ecdf(draws1[draws1[,1]>scores["80%",1],2])(0.5)

# proportion of faculty with evaluations at or below 20th percentile
# who are above the median in quality
1 - ecdf(draws1[draws1[,1]<=scores["20%",1],2])(0.5)

# proportion of faculty with evaluations above 95th percentile
# who are at or below the median in quality
ecdf(draws1[draws1[,1]>scores["95%",1],2])(0.5)

# proportion of faculty with evaluations at or below 5th percentile
# who are above the median in quality
1 - ecdf(draws1[draws1[,1]<=scores["5%",1],2])(0.5)


#################################
# Appendix Figure 7
#################################

pdf(file="copula-probability-density-highrate.pdf", width=5.5, height=5.85)

par(mar=c(5,5,4,2))

y.r <- (draws1[draws1[,1]>scores["95%",1],2])*100
hist(y.r, freq=F, breaks=10, ylim=c(0, 0.032), yaxt='n',
     #main = c("distribution of teacher quality", "student evaluations above 50th percentile"),
     main="", col= c(rep("white", 5), rep("gray", 5)),
     xlab = "percentile of teacher quality", ylab="")
axis(2, at=seq(0, 0.032, 0.004), las=1, labels=seq(0, 0.32, 0.04))
mtext(text="density (proportion of faculty in bin)", side=2, line=4)
legend(x=5, y=0.023, fill=c("white", "gray"), 
       legend=c("at or below median quality", "above median quality"))

dev.off()




#################################
# Appendix Figure 11
#################################

rm(list=ls())

library(mvtnorm)
set.seed(123456)

N <- 1000000
rho <- 0.4
gap <- 0

sigma.mat <- matrix(data=c(1, rho, rho, 1), nrow=2, ncol=2, byrow=T)
draws1 <- rmvnorm(N, mean=c(0,0), sigma=sigma.mat)
scores <- apply(X=draws1, FUN=quantile, MARGIN=2, probs=seq(0.05, 0.95, 0.05))

# proportion of faculty with evaluations below 20th percentile
# who are above-average quality teachers
1-ecdf(draws1[draws1[,1]<=scores["20%",1],2])(0)

pdf(file="probability-density.pdf", width=5.5, height=5.85)

par(mar=c(5,5,4,2))

y.r <- (draws1[draws1[,1]<=scores["20%",1],2])
hist(y.r, freq=F, ylim=c(0, 0.5), yaxt='n',
     #main = c("distribution of teacher quality", "student evaluations below 20th percentile"),
     main="", col= c(rep("white", 12), rep("gray", 9)),
     xlab = "standardized teacher quality metric", ylab="")

legend("topright", fill=c("white", "gray"), 
       legend=c("at or below median quality", "above median quality"))

axis(2, at=seq(0, 0.55, 0.05), las=1, labels=seq(0, 0.55, 0.05))
mtext(text="probability density", side=2, line=4)

dev.off()


# proportion of faculty with evaluations above 80th percentile
# who are at or below the median in quality
ecdf(draws1[draws1[,1]>scores["80%",1],2])(0)

# proportion of faculty with evaluations at or below 20th percentile
# who are above the median in quality
1 - ecdf(draws1[draws1[,1]<=scores["20%",1],2])(0)

# proportion of faculty with evaluations above 95th percentile
# who are at or below the median in quality
ecdf(draws1[draws1[,1]>scores["95%",1],2])(qnorm(0.50))

# proportion of faculty with evaluations at or below 5th percentile
# who are above the median in quality
1 - ecdf(draws1[draws1[,1]<=scores["5%",1],2])(qnorm(0.50))

#################################
# Appendix Figure 12
#################################

pdf(file="probability-density-highrate.pdf", width=5.5, height=5.85)

par(mar=c(5,5,4,2))

y.r <- (draws1[draws1[,1]>scores["95%",1],2])
hist(y.r, freq=F, ylim=c(0, 0.55), yaxt='n',
     #main = c("distribution of teacher quality", "student evaluations below 20th percentile"),
     main="", col= c(rep("white", 8), rep("gray", 11)),
     xlab = "standardized teacher quality metric", ylab="")

legend("topright", fill=c("white", "gray"), 
       legend=c("at or below median quality", "above median quality"))

axis(2, at=seq(0, 0.55, 0.05), las=1, labels=seq(0, 0.55, 0.05))
mtext(text="probability density", side=2, line=4)

dev.off()




#################################
# Figure 6
#################################


rm(list=ls())

library(mvtnorm)
set.seed(123456)

draws1 <- rmvnorm(9000, mean=c(0.2,0.2), sigma=matrix(data=c(0.3,0,0,1.3), ncol=2, nrow=2))
draws2 <- rmvnorm(1000, mean=c(-2,-2), sigma=matrix(data=c(0.1,0,0,0.1), ncol=2, nrow=2))
draws <- rbind(draws1, draws2)

cor(draws)
draws[,1] <- draws[,1] - mean(draws[,1])
draws[,1] <- draws[,1] / sd(draws[,1])
cor(draws)
mean(draws[,1])
mean(draws[,2])
sd(draws[,1])
sd(draws[,2])

pdf(file="bimodal-cor.pdf", width=5, height=5.85)

plot(draws, cex=0.5, col=gray(0.6, alpha=0.25),
     xlab="standardized SET score", ylab="raw faculty quality metric",
     xlim=c(-3.5, 3.5), ylim=c(-6.5,6.5))
q1 <- quantile(ecdf(draws[,1]), 0.1)
q1
abline(v=q1, lty=2)
abline(h=0, lty=2)

dev.off()

sel <- draws[,1] < q1
1 - mean( draws[sel==1,2] <= 0 )

vcv <- cov(draws)

draws <- rmvnorm(10000, mean=c(0,0), sigma=vcv)
cor(draws)
mean(draws[,1])
mean(draws[,2])
sd(draws[,1])
sd(draws[,2])

pdf(file="unimodal-cor.pdf", width=5, height=5.85)

plot(draws, cex=0.5, col=gray(0.6, alpha=0.25),
     xlab="standardized SET score", ylab="raw faculty quality metric",
     xlim=c(-3.5, 3.5), ylim=c(-6.5,6.5))
q2 <- quantile(ecdf(draws[,1]), 0.1)
q2
abline(v=q2, lty=2)
abline(h=0, lty=2)

dev.off()

sel <- draws[,1] < q2
1 - mean( draws[sel==1,2] <= 0 )








#################################
# Figure 5
#################################

rm(list=ls())

library(mvtnorm)
library(copula)
library(gplots)

set.seed(123456)
mcl.vec <- seq(from = 0, to = 0.9, by = 0.1)
reps <- 10000
N <- 1000
rho <- 0.4
accuracy.4 <- c()
ui.4.v <- c()
li.4.v <- c()
accuracy.3 <- c()
ui.3.v <- c()
li.3.v <- c()
accuracy.2 <- c()
ui.2.v <- c()
li.2.v <- c()


for(j in 1:length(mcl.vec)){
 
  mcl <- mcl.vec[j]
  cat("Correlation = ", mcl.vec[j], "\n", "\n")
  
  cor.store.4 <- c()
  cor.store.3 <- c()
  cor.store.2 <- c()
  
  pb <- txtProgressBar(min=1, max=reps, initial=0, style=3)
   
  for(i in 1:reps){
    
    setTxtProgressBar(pb, i)
  
    vcv <- diag(5)
    vcv[1,] <- c(1, rho, rho, rho, rho)
    vcv[2,] <- c(rho, 1, mcl, mcl, mcl)
    vcv[3,] <- c(rho, mcl, 1, mcl, mcl)
    vcv[4,] <- c(rho, mcl, mcl, 1, mcl)
    vcv[5,] <- c(rho, mcl, mcl, mcl, 1)
    
    norm.cop <- normalCopula(P2p(vcv), dim=5, dispstr="un")
    draws <- rCopula(N, norm.cop)
    colnames(draws) <- c("true.q", "s1", "s2", "s3", "s4")
    
    dat <- data.frame(draws)
    
    dat.s <- subset(dat, select=(-true.q))
    pred.q <- apply(X=dat.s, FUN=mean, MARGIN=1)
    cor.store.4[i] <- cor(pred.q, dat$true.q)
    
    dat.s <- subset(dat, select=(-c(true.q, s4)))
    pred.q <- apply(X=dat.s, FUN=mean, MARGIN=1)
    cor.store.3[i] <- cor(pred.q, dat$true.q)
    
    dat.s <- subset(dat, select=(-c(true.q, s4, s3)))
    pred.q <- apply(X=dat.s, FUN=mean, MARGIN=1)
    cor.store.2[i] <- cor(pred.q, dat$true.q)
    
    
  
  }
  
  accuracy.4[j] <- mean(cor.store.4) 
  ui.4.v[j] <- quantile(cor.store.4, probs=0.975)
  li.4.v[j] <- quantile(cor.store.4, probs=0.025)
  
  accuracy.3[j] <- mean(cor.store.3) 
  ui.3.v[j] <- quantile(cor.store.3, probs=0.975)
  li.3.v[j] <- quantile(cor.store.3, probs=0.025)
  
  accuracy.2[j] <- mean(cor.store.2) 
  ui.2.v[j] <- quantile(cor.store.2, probs=0.975)
  li.2.v[j] <- quantile(cor.store.2, probs=0.025)
  
  close(pb)
  
}

plotCI(x = mcl.vec+0.03, y = accuracy.4, ui=ui.4.v, li=li.4.v,
       xlab = "collinearity among noisy measures",
       ylab = "correlation of average with quality", 
       xlim=c(-0.05, 1.05), ylim=c(0.3, 0.85), sfrac=0.005)

plotCI(x = mcl.vec, y = accuracy.3, ui=ui.3.v, li=li.3.v,
       add=T, sfrac=0.005)

plotCI(x = mcl.vec-0.03, y = accuracy.2, ui=ui.2.v, li=li.2.v,
       add=T, sfrac=0.005)

pdf(file="averaged-measure.pdf", width=5, height=5.85)

plot(accuracy.4 ~ mcl.vec,
     xlab = "collinearity among noisy measures",
     ylab = "correlation between averaged measure and instructor quality", 
     xlim=c(-0.05, 1.05), ylim=c(0.38, 0.8), type="l")
#lines(ui.4.v ~ mcl.vec, lty=2)
#lines(li.4.v ~ mcl.vec, lty=2)

lines(accuracy.3 ~ mcl.vec, col=gray(0.6), lty=2)
#lines(ui.3.v ~ mcl.vec, lty=2, col=gray(0.7))
#lines(li.3.v ~ mcl.vec, lty=2, col=gray(0.7))

lines(accuracy.2 ~ mcl.vec, col=gray(0.8), lwd=2)
#lines(ui.2.v ~ mcl.vec, lty=2, col=gray(0.6))
#lines(li.2.v ~ mcl.vec, lty=2, col=gray(0.6))

legend("topright", lty=c(1,2,1), lwd=c(1,1,2), col=c("black", gray(0.6), gray(0.8)), legend = c("4 measures", "3 measures", "2 measures"))

dev.off()






#################################
# Appendix Figure 13
#################################

rm(list=ls())

library(mvtnorm)
library(copula)
library(gplots)

set.seed(123456)
mcl.vec <- seq(from = 0, to = 0.9, by = 0.1)
reps <- 10000
N <- 1000
rho <- 0.4
accuracy.4 <- c()
ui.4.v <- c()
li.4.v <- c()
accuracy.3 <- c()
ui.3.v <- c()
li.3.v <- c()
accuracy.2 <- c()
ui.2.v <- c()
li.2.v <- c()


for(j in 1:length(mcl.vec)){
  
  mcl <- mcl.vec[j]
  cat("Correlation = ", mcl.vec[j], "\n", "\n")
  
  cor.store.4 <- c()
  cor.store.3 <- c()
  cor.store.2 <- c()
  
  pb <- txtProgressBar(min=1, max=reps, initial=0, style=3)
  
  for(i in 1:reps){
    
    setTxtProgressBar(pb, i)
    
    vcv <- diag(5)
    vcv[1,] <- c(1, rho, rho, rho, rho)
    vcv[2,] <- c(rho, 1, mcl, mcl, mcl)
    vcv[3,] <- c(rho, mcl, 1, mcl, mcl)
    vcv[4,] <- c(rho, mcl, mcl, 1, mcl)
    vcv[5,] <- c(rho, mcl, mcl, mcl, 1)
    
    #norm.cop <- normalCopula(P2p(vcv), dim=5, dispstr="un")
    #draws <- rCopula(N, norm.cop)
    draws <- rmvnorm(N, mean=rep(0,5), sigma=vcv)
    colnames(draws) <- c("true.q", "s1", "s2", "s3", "s4")
    
    dat <- data.frame(draws)
    
    dat.s <- subset(dat, select=(-true.q))
    pred.q <- apply(X=dat.s, FUN=mean, MARGIN=1)
    cor.store.4[i] <- cor(pred.q, dat$true.q)
    
    dat.s <- subset(dat, select=(-c(true.q, s4)))
    pred.q <- apply(X=dat.s, FUN=mean, MARGIN=1)
    cor.store.3[i] <- cor(pred.q, dat$true.q)
    
    dat.s <- subset(dat, select=(-c(true.q, s4, s3)))
    pred.q <- apply(X=dat.s, FUN=mean, MARGIN=1)
    cor.store.2[i] <- cor(pred.q, dat$true.q)
    
    
    
  }
  
  accuracy.4[j] <- mean(cor.store.4) 
  ui.4.v[j] <- quantile(cor.store.4, probs=0.975)
  li.4.v[j] <- quantile(cor.store.4, probs=0.025)
  
  accuracy.3[j] <- mean(cor.store.3) 
  ui.3.v[j] <- quantile(cor.store.3, probs=0.975)
  li.3.v[j] <- quantile(cor.store.3, probs=0.025)
  
  accuracy.2[j] <- mean(cor.store.2) 
  ui.2.v[j] <- quantile(cor.store.2, probs=0.975)
  li.2.v[j] <- quantile(cor.store.2, probs=0.025)
  
  close(pb)
  
}

plotCI(x = mcl.vec+0.03, y = accuracy.4, ui=ui.4.v, li=li.4.v,
       xlab = "collinearity among noisy measures",
       ylab = "correlation of average with quality", 
       xlim=c(-0.05, 1.05), ylim=c(0.3, 0.85), sfrac=0.005)

plotCI(x = mcl.vec, y = accuracy.3, ui=ui.3.v, li=li.3.v,
       add=T, sfrac=0.005)

plotCI(x = mcl.vec-0.03, y = accuracy.2, ui=ui.2.v, li=li.2.v,
       add=T, sfrac=0.005)

pdf(file="averaged-measure-normal.pdf", width=5, height=5.85)

plot(accuracy.4 ~ mcl.vec,
     xlab = "collinearity among noisy measures",
     ylab = "correlation between averaged measure and instructor quality", 
     xlim=c(-0.05, 1.05), ylim=c(0.38, 0.8), type="l")
#lines(ui.4.v ~ mcl.vec, lty=2)
#lines(li.4.v ~ mcl.vec, lty=2)

lines(accuracy.3 ~ mcl.vec, col=gray(0.6), lty=2)
#lines(ui.3.v ~ mcl.vec, lty=2, col=gray(0.7))
#lines(li.3.v ~ mcl.vec, lty=2, col=gray(0.7))

lines(accuracy.2 ~ mcl.vec, col=gray(0.8), lwd=2)
#lines(ui.2.v ~ mcl.vec, lty=2, col=gray(0.6))
#lines(li.2.v ~ mcl.vec, lty=2, col=gray(0.6))

legend("topright", lty=c(1,2,1), lwd=c(1,1,2), col=c("black", gray(0.6), gray(0.8)), legend = c("4 measures", "3 measures", "2 measures"))

dev.off()

